# This code is to generate table and figures for the R&R of spell paper
# Created by:           Shuyi Qiu
# First created on:     May 4th, 2025
# Last edited on:       May 4th, 2025
# Data Source: AddData/foreg.RData, 
# Data generated from spell_data_012324.R + Collapse procedure in spell_tabfig_030224.R

rm(list = ls())
# Settings ----
set.seed(27705)
setwd("/Users/RachelChiu/Documents/ReProj/PSID/spell")
library(tidyverse)
library(ggplot2)
library(psidread)
library(dplyr)
library(haven)
library(readxl)
library(data.table)
library(Amelia)
library(janitor)
library(mitools)
library(mice)
library(weights)
library(radiant)
library(miceadds)
library(sandwich)


load("AddData/foreg.RData")

# Keep 3 substantial stats and remove the leading 0s

rmlead0 <- function(x, digits) {
  x <- round(x, digits = digits)
  
  ifelse(x < 1 & x > -1,
         sub("0\\.", ".", format(x, scientific = FALSE)),  # Escape the dot because it's a special character in regex
         format(x, scientific = FALSE))
}

# 1. Two-by-two tables & correlations for NWP and IP measures ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  temp <- miceadds::micombine.cor(mi.res=datlist, 
                                  variables=c(paste0(i,"nwp"),paste0(i,"incp")),
                                  conf.level=0.95,
                                  method="pearson")
  reg_summary <- bind_rows(reg_summary, temp)
}

write.table(reg_summary, "Output/table1corr.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

# 2. Logit model ----
## Table 2. Main model ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(i) + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

# write.csv(reg_summary, file = "temp.csv",quote = TRUE)

write.table(reg_summary, "Output/table2_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

## Table 3. By age ----
### 3a. Percent ----
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


write.table(reg_summary, "Output/table3pct_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


### 3b. Share (Condition on Poverty) ----
# Conditional on those who ever experienced poverty for at least one wave
datlist_sub  <- subset_datlist(datlist,  expr_subset=expression(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0) )
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist_sub, glm(formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist_sub$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}

write.table(reg_summary, "Output/table3share_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

## Table 4. Income mirror ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.table(reg_summary, "Output/table4_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)



## Table 5. Income mirror, by age ----
### 5a. Percent ----
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}

write.table(reg_summary, "Output/table5pct_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


### 5b. Share ----
datlist_sub2  <- subset_datlist(datlist,  expr_subset=expression(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0 & incp_share0to6 + incp_share6to12 + incp_share12to18 > 0) )
reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_share6to12 + incp_share12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial(link = "logit"), weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}

write.table(reg_summary, "Output/table5share_logit_mi.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


# 3. With interaction ----
## Table 4. Income interact ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(paste0(i,"nwp")) * get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.table(reg_summary, "Output/table4_lpm_mi_interact.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

# 4. Coefficient test ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    diff_list <- lapply(datlist$imputations, function(data) {
      mod <- glm(data = data, formula = get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
      # Clustered VCOV by individual
      vcov_cl <- vcovCL(mod, cluster = datlist$imputations[[1]]$indfid)
      # Coefficient difference
      b <- coef(mod)
      nwp_name <- as.character("get(paste0(i, \"nwp\"))")
      incp_name <- as.character("get(paste0(i, \"incp\"))")
      beta_diff <- b[incp_name] - b[nwp_name]
      # Variance of the difference
      var_diff <- vcov_cl[incp_name, incp_name] +
        vcov_cl[nwp_name, nwp_name] -
        2 * vcov_cl[incp_name, nwp_name]
      c(estimate = beta_diff, variance = var_diff)
    })
    
    estimates <- lapply(diff_list, function(x) x[paste0("estimate.",as.character("get(paste0(i, \"incp\"))"))])
    variances <- lapply(diff_list, function(x) x["variance"])
    pooled <- MIcombine(results = estimates, variances = variances)
    temp <- summary(pooled) |> mutate(outcome = j)
    reg_summary <- bind_rows(reg_summary, temp)
  }
}

write.table(reg_summary, "Output/table4_test.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


# 5. Non-missing analysis (listwise deletion) ----
# Run the lines at the beginning of spell_data_012324.R to generate the non-missing sample ID
nm_sample_id <- d |> 
  group_by(pid) |> 
  summarise(num_miss = sum(missed_year)) |> 
  filter(num_miss == 0)

# Extract any dataset and run the regression
foreg_nm <- datlist$imputations[[1]] |> 
  right_join(nm_sample_id, by = "pid")

## Table 2----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- glm(data = foreg_nm, formula = get(j) ~ get(i) + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
    betas <- coef(fit)
    se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm$indfid))) #vcovCL is in the sandwich package
    tval <- betas/se
    pval <- 2 * (1-pnorm(abs(tval)))
    reg_temp <- data.frame(
      term = names(betas),
      results = betas,
      se = se,
      p = pval
    ) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.table(reg_summary, "Output/table2_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


## Table 3. By age ----
### 3a. Percent ----
reg_summary <- data.frame(term = NA, name = NA)
  for (j in c("hs_completed","clg_enrolled")){
    fit <- glm(data = foreg_nm, formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
    betas <- coef(fit)
    se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm$indfid))) #vcovCL is in the sandwich package
    tval <- betas/se
    pval <- 2 * (1-pnorm(abs(tval)))
    reg_temp <- data.frame(
      term = names(betas),
      results = betas,
      se = se,
      p = pval
    ) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }

write.table(reg_summary, "Output/table3apct_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

### 3b. Share ----
foreg_nm_evernwp <- foreg_nm |> 
  filter(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0)
reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- glm(data = foreg_nm_evernwp, formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
  betas <- coef(fit)
  se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm_evernwp$indfid))) #vcovCL is in the sandwich package
  tval <- betas/se
  pval <- 2 * (1-pnorm(abs(tval)))
  reg_temp <- data.frame(
    term = names(betas),
    results = betas,
    se = se,
    p = pval
  ) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
}

write.table(reg_summary, "Output/table3share_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


## Table 4. Income mirror -----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- glm(data = foreg_nm, formula = get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
    betas <- coef(fit)
    se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm$indfid))) #vcovCL is in the sandwich package
    tval <- betas/se
    pval <- 2 * (1-pnorm(abs(tval)))
    reg_temp <- data.frame(
      term = names(betas),
      results = betas,
      se = se,
      p = pval
    ) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.table(reg_summary, "Output/table4_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

## Table 5. Income mirrow, by age ----
### 5a. Percent ----
reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- glm(data = foreg_nm, formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
  betas <- coef(fit)
  se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm$indfid))) #vcovCL is in the sandwich package
  tval <- betas/se
  pval <- 2 * (1-pnorm(abs(tval)))
  reg_temp <- data.frame(
    term = names(betas),
    results = betas,
    se = se,
    p = pval
  ) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
}

write.table(reg_summary, "Output/table5apct_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

### 5b. Share ----
foreg_nm_evernwp <- foreg_nm |> 
  filter(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0 & incp_share0to6 + incp_share6to12 + incp_share12to18 > 0)
reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- glm(data = foreg_nm_evernwp, formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_share6to12 + incp_share12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt)
  betas <- coef(fit)
  se <- sqrt(diag(vcovCL(fit, cluster = foreg_nm_evernwp$indfid))) #vcovCL is in the sandwich package
  tval <- betas/se
  pval <- 2 * (1-pnorm(abs(tval)))
  reg_temp <- data.frame(
    term = names(betas),
    results = betas,
    se = se,
    p = pval
  ) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
}

write.table(reg_summary, "Output/table5share_lpm_nm.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

# 6. Debt poor ----
rm(list = ls())
# Done in previous robustness check
# Need non-collapsed imputed data. Run the spell_data_012324.R
imp_long_df <- data.frame()
for (i in c(1:5)){
  cdoc_imp <- along.out$imputations[[i]] |> 
    clean_names() |> 
    mutate(.imp = i)
  
  cov_df <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_children_gt3 = ifelse(n_children >= 3, T, F),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    filter(incp == T) |> 
    group_by(pid) |> 
    slice_min(order_by = year)
  
  summary_df <- cov_df |> 
    ungroup() |>
    summarise(
      median_inc = median(inc*Ratio21_inc),
      median_nw = median((tassets - tdebts + homevalue - vehicles)*Ratio21_nw),
      median_asset = median((tassets + homevalue - vehicles) * Ratio21_nw),
      median_debt = median(tdebts * Ratio21_nw)
    ) |> 
    mutate(.imp = i,
           povtype = "incp")
  
  imp_long_df <- bind_rows(imp_long_df, summary_df)
}

View(imp_long_df)

temp <- imp_long_df |> 
  group_by(povtype) |> 
  summarise(across(everything(), ~ mean(.x, na.rm = TRUE)))

write.table(temp, "Output/table4add.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)
